{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Change out of the experimentation directory\n",
    "%cd .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import azureml.core\n",
    "from azureml.core import Workspace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the workspace from the saved config file\n",
    "ws = Workspace.from_config()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, shutil\n",
    "\n",
    "# Create a folder for the experiment files\n",
    "training_folder = 'diabetes-training'\n",
    "os.makedirs(training_folder, exist_ok=True)\n",
    "\n",
    "# Copy the data file into the experiment folder\n",
    "shutil.copy('data/diabetes.csv', os.path.join(training_folder, \"diabetes.csv\"))\n",
    "\n",
    "# Copy the train functions into the experiment folder\n",
    "shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, \"train.py\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%writefile $training_folder/parameters.json\n",
    "{\n",
    "    \"training\":\n",
    "    {\n",
    "        \"alpha\": 0.3\n",
    "    },\n",
    "    \"evaluation\":\n",
    "    {\n",
    "\n",
    "    },\n",
    "    \"scoring\":\n",
    "    {\n",
    "        \n",
    "    }\n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%writefile $training_folder/diabetes_training.py\n",
    "# Import libraries\n",
    "from azureml.core import Run\n",
    "import json\n",
    "import os\n",
    "import pandas as pd\n",
    "import shutil\n",
    "\n",
    "from train import split_data, train_model\n",
    "\n",
    "# Get the experiment run context\n",
    "run = Run.get_context()\n",
    "\n",
    "# load the diabetes dataset\n",
    "print(\"Loading Data...\")\n",
    "train_df = pd.read_csv('diabetes.csv')\n",
    "\n",
    "data = split_data(train_df)\n",
    "\n",
    "# Specify the parameters to test\n",
    "with open(\"parameters.json\") as f:\n",
    "    pars = json.load(f)\n",
    "    train_args = pars[\"training\"]\n",
    "\n",
    "# Log parameters\n",
    "for k, v in train_args.items():\n",
    "    run.log(k, v)\n",
    "\n",
    "model, metrics = train_model(data, train_args)\n",
    "\n",
    "# Log metrics\n",
    "for k, v in metrics.items():\n",
    "    run.log(k, v)\n",
    "\n",
    "# Save the parameters file to the outputs folder\n",
    "os.makedirs('outputs', exist_ok=True)\n",
    "shutil.copy('parameters.json', os.path.join('outputs', 'parameters.json'))\n",
    "    \n",
    "run.complete()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from azureml.train.estimator import Estimator\n",
    "from azureml.core import Experiment\n",
    "\n",
    "# Create an estimator\n",
    "estimator = Estimator(source_directory=training_folder,\n",
    "                      entry_script='diabetes_training.py',\n",
    "                      compute_target='local',\n",
    "                      conda_packages=['scikit-learn']\n",
    "                      )\n",
    "\n",
    "# Create an experiment\n",
    "experiment_name = 'diabetes-training'\n",
    "experiment = Experiment(workspace = ws, name = experiment_name)\n",
    "\n",
    "# Run the experiment based on the estimator\n",
    "run = experiment.submit(config=estimator)\n",
    "run.wait_for_completion(show_output=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "metrics = run.get_metrics()\n",
    "for k, v in metrics.items():\n",
    "        print(k, v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in run.get_file_names():\n",
    "    print(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.6.10 64-bit ('OH3': conda)",
   "language": "python",
   "name": "python361064bitoh3conda5f7beeba8c1d407187c86667ecfb684f"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
